#R version 4.2.1 (2022-06-23)
#Platform: x86_64-w64-mingw32/x64 (64-bit)
#Running under: Windows 10 x64 (build 19044)

# may need to unload some packages to make everything work 
#detach("package:ltm", unload=TRUE)
#detach("package:MASS", unload=TRUE)

library(dplyr) #version 1.0.9
library(readr) #version 2.1.2
library(stringr) #version 1.4.0

#install country standardization tool
library(devtools) #version 2.4.3
devtools::install_github("dsself/standardizecountries", force = T)
library(standard)

#inport v-dem data####
d1 <- read_rds("data/vdem_party.rds") %>% 
  mutate(countryname = country_panel(country_name, year)) %>% 
  select(countryname, year, v2pashname, v2paseatshare, v2panumbseat, v2patotalseat, v2pavote, 
         v2paclient, v2palocoff, v2paactcom, v2pasoctie, v2panom, v2padisa, v2paind, v2pagovsup) %>% 
  filter(v2pagovsup %in% c(0,1))

#gwf####
d2 <- read_csv("data/panel.csv") %>% 
  select(countryname, year, gwf_casename, gwf_regimetype, v2pssunpar) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(gwf_regimetype, "military"))

gwf <- left_join(d2, d1, by = c("countryname", "year")) %>% 
  filter(!is.na(v2pashname))

gwf_dffa <- select(gwf, gwf_casename, year, v2paseatshare, v2pavote, v2pssunpar) %>% 
  na.omit() %>% unique()

gwf_fa <- select(gwf_dffa, v2paseatshare, v2pavote, v2pssunpar) 

fit_gwf <- factanal(gwf_fa, 1, rotation="varimax", scores = "regression")

out_gwf <- as_tibble(fit_gwf$scores) %>% 
  mutate(Factor1 = ((Factor1 - min(Factor1, na.rm = T))/(max(Factor1, na.rm = T)-min(Factor1, na.rm = T)))) %>% 
  rename(gwf_incumbent_strength = Factor1)

gwf_fa <- cbind(gwf_dffa, out_gwf) %>% 
  select(gwf_casename, year, gwf_incumbent_strength) 

gwf_final <- left_join(gwf, gwf_fa, by = c("gwf_casename", "year")) %>% 
  group_by(gwf_casename, year) %>% 
  mutate_at(.vars = vars(gwf_incumbent_strength), ~mean(., na.rm = T)) %>% 
  unique() %>% 
  mutate_if(is.numeric, ~ifelse(is.nan(.), NA, .)) %>% 
  ungroup() %>% 
  filter(!is.na(gwf_casename), str_detect(gwf_regimetype, "military")) %>%
  select(gwf_casename, year, gwf_incumbent_strength) %>% 
  unique() %>% arrange(desc(gwf_incumbent_strength)) %>% 
  write_csv("data/gwf_fa_strength.csv")

#dd####
d2 <- read_csv("data/panel.csv") %>% 
  select(countryname, year, dd_case, dd_regime, v2pssunpar) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(dd_regime, "military"))

dd <- left_join(d2, d1, by = c("countryname", "year")) %>% 
  filter(!is.na(v2pashname))

dd_dffa <- select(dd, dd_case, year, v2paseatshare, v2pavote, v2pssunpar) %>% 
  na.omit() %>% unique()

dd_fa <- select(dd_dffa, v2paseatshare, v2pavote, v2pssunpar) 

fit_dd <- factanal(dd_fa, 1, rotation="varimax", scores = "regression")

out_dd <- as_tibble(fit_dd$scores) %>% 
  mutate(Factor1 = ((Factor1 - min(Factor1, na.rm = T))/(max(Factor1, na.rm = T)-min(Factor1, na.rm = T)))) %>% 
  rename(dd_incumbent_strength = Factor1)

dd_fa <- cbind(dd_dffa, out_dd) %>% 
  select(dd_case, year, dd_incumbent_strength) 

dd_final <- left_join(dd, dd_fa, by = c("dd_case", "year")) %>% 
  group_by(dd_case, year) %>% 
  mutate_at(.vars = vars(dd_incumbent_strength), ~mean(., na.rm = T)) %>% 
  unique() %>% 
  mutate_if(is.numeric, ~ifelse(is.nan(.), NA, .)) %>% 
  ungroup() %>% 
  filter(!is.na(dd_case), str_detect(dd_regime, "military")) %>%
  select(dd_case, year, dd_incumbent_strength) %>% 
  unique() %>% arrange(desc(dd_incumbent_strength)) %>% 
  write_csv("data/dd_fa_strength.csv")

#par####
d2 <- read_csv("data/panel.csv") %>% 
  select(countryname, year, svolik_case, svolik_regime, v2pssunpar) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(svolik_regime, "military"))

svolik <- left_join(d2, d1, by = c("countryname", "year")) %>% 
  filter(!is.na(v2pashname))

svolik_dffa <- select(svolik, svolik_case, year, v2paseatshare, v2pavote, v2pssunpar) %>% 
  na.omit() %>% unique()

svolik_fa <- select(svolik_dffa, v2paseatshare, v2pavote, v2pssunpar) 

fit_svolik <- factanal(svolik_fa, 1, rotation="varimax", scores = "regression")

out_svolik <- as_tibble(fit_svolik$scores) %>% 
  mutate(Factor1 = ((Factor1 - min(Factor1, na.rm = T))/(max(Factor1, na.rm = T)-min(Factor1, na.rm = T)))) %>% 
  rename(svolik_incumbent_strength = Factor1)

svolik_fa <- cbind(svolik_dffa, out_svolik) %>% 
  select(svolik_case, year, svolik_incumbent_strength) 

svolik_final <- left_join(svolik, svolik_fa, by = c("svolik_case", "year")) %>% 
  group_by(svolik_case, year) %>% 
  mutate_at(.vars = vars(svolik_incumbent_strength), ~mean(., na.rm = T)) %>% 
  unique() %>% 
  mutate_if(is.numeric, ~ifelse(is.nan(.), NA, .)) %>% 
  ungroup() %>% 
  filter(!is.na(svolik_case), str_detect(svolik_regime, "military")) %>%
  select(svolik_case, year, svolik_incumbent_strength) %>% 
  unique() %>% arrange(desc(svolik_incumbent_strength)) %>% 
  write_csv("data/svolik_fa_strength.csv")

#wth####
d2 <- read_csv("data/panel.csv") %>% 
  select(countryname, year, wth_case, wth_regime, v2pssunpar) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(wth_regime, "military"))

wth <- left_join(d2, d1, by = c("countryname", "year")) %>% 
  filter(!is.na(v2pashname))

wth_dffa <- select(wth, wth_case, year, v2paseatshare, v2pavote, v2pssunpar) %>% 
  na.omit() %>% unique()

wth_fa <- select(wth_dffa, v2paseatshare, v2pavote, v2pssunpar) 

fit_wth <- factanal(wth_fa, 1, rotation="varimax", scores = "regression")

out_wth <- as_tibble(fit_wth$scores) %>% 
  mutate(Factor1 = ((Factor1 - min(Factor1, na.rm = T))/(max(Factor1, na.rm = T)-min(Factor1, na.rm = T)))) %>% 
  rename(wth_incumbent_strength = Factor1)

wth_fa <- cbind(wth_dffa, out_wth) %>% 
  select(wth_case, year, wth_incumbent_strength) 

wth_final <- left_join(wth, wth_fa, by = c("wth_case", "year")) %>% 
  group_by(wth_case, year) %>% 
  mutate_at(.vars = vars(wth_incumbent_strength), ~mean(., na.rm = T)) %>% 
  unique() %>% 
  mutate_if(is.numeric, ~ifelse(is.nan(.), NA, .)) %>% 
  ungroup() %>% 
  filter(!is.na(wth_case), str_detect(wth_regime, "military")) %>%
  select(wth_case, year, wth_incumbent_strength) %>% 
  unique() %>% arrange(desc(wth_incumbent_strength)) %>% 
  write_csv("data/wth_fa_strength.csv")


#validation####

gwf_m <- select(gwf_dffa, v2paseatshare, v2pavote, v2pssunpar) %>% 
  summarise_all(~mean(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

gwf_sd <- select(gwf_dffa, v2paseatshare, v2pavote, v2pssunpar) %>% 
  summarise_all(~sd(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

svolik_m <- select(svolik_dffa, v2paseatshare, v2pavote, v2pssunpar) %>% 
  summarise_all(~mean(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

svolik_sd <- select(svolik_dffa, v2paseatshare, v2pavote, v2pssunpar) %>% 
  summarise_all(~sd(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

dd_m <- select(dd_dffa, v2paseatshare, v2pavote, v2pssunpar) %>% 
  summarise_all(~mean(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

dd_sd <- select(dd_dffa, v2paseatshare, v2pavote, v2pssunpar) %>% 
  summarise_all(~sd(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

wth_m <- select(wth_dffa, v2paseatshare, v2pavote, v2pssunpar) %>% 
  summarise_all(~mean(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

wth_sd <- select(wth_dffa, v2paseatshare, v2pavote, v2pssunpar) %>% 
  summarise_all(~sd(., na.rm = T)) %>% 
  mutate_if(is.numeric, ~round(., 2))

all_means <- rbind(gwf_m, svolik_m, dd_m, wth_m) %>% 
  mutate(dataset = c("GWF", "PAR", "DD", "WTH")) %>% 
  select(dataset, v2paseatshare, v2pavote, v2pssunpar)

all_sd <- rbind(gwf_sd, svolik_sd, dd_sd, wth_sd) %>% 
  mutate(dataset = c("GWF", "PAR", "DD", "WTH")) %>% 
  select(dataset, v2paseatshare, v2pavote, v2pssunpar)

stargazer(all_means, summary = F)

#plots ####
library(scales)
gwf <- read_csv("data/panel.csv") %>% 
  select(countryname, gwf_casename, year, gwf_regimetype, v2pssunpar) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(gwf_regimetype, "military")) %>% 
  left_join(gwf_final, by = c("gwf_casename", "year")) %>% 
  group_by(gwf_casename) %>% 
  mutate_at(.vars = vars(gwf_incumbent_strength), ~na.locf0(.)) %>% 
  group_by(countryname) %>% 
  mutate(gwf_incumbent_strength1 = ifelse(is.na(gwf_incumbent_strength), v2pssunpar, gwf_incumbent_strength)) %>% 
  mutate(v2pssunpar = ifelse(is.na(v2pssunpar), 0, v2pssunpar)) %>% 
  mutate(gwf_incumbent_strength2 = ifelse(is.na(gwf_incumbent_strength), v2pssunpar, gwf_incumbent_strength)) 

gwf1 <- ggplot(gwf, aes(x = gwf_incumbent_strength1)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: GWF \n Percentage") + 
  xlab("") +
  scale_y_continuous(labels = percent)

par <- read_csv("data/panel.csv") %>% 
  select(countryname, svolik_case, year, svolik_regime, v2pssunpar) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(svolik_regime, "military")) %>% 
  left_join(svolik_final, by = c("svolik_case", "year")) %>% 
  group_by(svolik_case) %>% 
  mutate_at(.vars = vars(svolik_incumbent_strength), ~na.locf0(.)) %>% 
  group_by(countryname) %>% 
  mutate(svolik_incumbent_strength1 = ifelse(is.na(svolik_incumbent_strength), v2pssunpar, svolik_incumbent_strength)) %>% 
  mutate(v2pssunpar = ifelse(is.na(v2pssunpar), 0, v2pssunpar)) %>% 
  mutate(svolik_incumbent_strength2 = ifelse(is.na(svolik_incumbent_strength), v2pssunpar, svolik_incumbent_strength)) 

par1 <- ggplot(par, aes(x = svolik_incumbent_strength1)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: PAR") + 
  xlab("") +
  scale_y_continuous(labels = percent)

dd <- read_csv("data/panel.csv") %>% 
  select(countryname, dd_case, year, dd_regime, v2pssunpar) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(dd_regime, "military")) %>% 
  left_join(dd_final, by = c("dd_case", "year")) %>% 
  group_by(dd_case) %>% 
  mutate_at(.vars = vars(dd_incumbent_strength), ~na.locf0(.)) %>% 
  group_by(countryname) %>% 
  mutate(dd_incumbent_strength1 = ifelse(is.na(dd_incumbent_strength), v2pssunpar, dd_incumbent_strength)) %>% 
  mutate(v2pssunpar = ifelse(is.na(v2pssunpar), 0, v2pssunpar)) %>% 
  mutate(dd_incumbent_strength2 = ifelse(is.na(dd_incumbent_strength), v2pssunpar, dd_incumbent_strength)) 

dd1 <- ggplot(dd, aes(x = dd_incumbent_strength1)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: DD \n Percentage") + 
  xlab("Party Strength") +
  scale_y_continuous(labels = percent)

wth <- read_csv("data/panel.csv") %>% 
  select(countryname, wth_case, year, wth_regime, v2pssunpar) %>% 
  mutate(v2pssunpar = ((v2pssunpar - min(v2pssunpar, na.rm = T))/(max(v2pssunpar, na.rm = T)-min(v2pssunpar, na.rm = T)))) %>% 
  filter(str_detect(wth_regime, "military")) %>% 
  left_join(wth_final, by = c("wth_case", "year")) %>% 
  group_by(wth_case) %>% 
  mutate_at(.vars = vars(wth_incumbent_strength), ~na.locf0(.)) %>% 
  group_by(countryname) %>% 
  mutate(wth_incumbent_strength1 = ifelse(is.na(wth_incumbent_strength), v2pssunpar, wth_incumbent_strength)) %>% 
  mutate(v2pssunpar = ifelse(is.na(v2pssunpar), 0, v2pssunpar)) %>% 
  mutate(wth_incumbent_strength2 = ifelse(is.na(wth_incumbent_strength), v2pssunpar, wth_incumbent_strength)) 

wth1 <- ggplot(wth, aes(x = wth_incumbent_strength1)) +  
  geom_histogram(aes(y = (..count..)/sum(..count..))) +
  stat_bin(aes(y=(..count..)/sum(..count..))) +
  theme_bw() + 
  ylab("Dataset: WTH") + 
  xlab("Party Strength") +
  scale_y_continuous(labels = percent)

ggarrange(gwf1, par1, dd1, wth1, ncol = 2, nrow = 2) +
  ggsave(filename = "figures/appendix_strength1.jpg", dpi = 1000, width = 6.5, height = 6.5)  

